#%%
import numpy as np
import pandas as pd
path='acclog_Cora.txt'
# path='acclog_citeseer.txt'

lines = open(path, 'r').readlines()
print(lines)
# %%

r = pd.DataFrame([])
for i, l in enumerate(lines):
    parm_list = l.split('autotrain_')[1].split('_/train')[0].split('clasteracc')[0].split('__')
    d={}
    for parm in parm_list:
        k = parm.split('_')[-2]
        v = float(parm.split('_')[-1])
        d[k]=[v]
    d['epoch']=[float(l.split('epoch_em')[1].split('_clasteracc')[0])]
    d['acc']=[float(l.split('clasteracc')[1].split('_')[0])]
    d = pd.DataFrame.from_dict(d)
    r = pd.concat([r,d])
    # r.append(d)
        # print(k, v)
r
    # print(float(l.split('clasteracc')[1].split('_')[0]))
# %%
r.groupby("embdim").agg('mean')
# %%
r.groupby("embdim").agg('max')
# %%
r.groupby("enlarge").agg('mean')
# %%
r.groupby("enlarge").agg('max')
# %%
r.groupby("perplexity").agg('mean')
# %%
r.groupby("perplexity").agg('max')
# %%
r.groupby("p").agg('mean')
# %%
r.groupby("p").agg('max')
# %%
r.groupby("v").agg('mean')
# %%
r.groupby("v").agg('max')

# %%
